{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import plotnine as p9\n",
    "from sklearn.datasets import load_digits\n",
    "from scipy.spatial.distance import pdist\n",
    "from sklearn.manifold.t_sne import _joint_probabilities\n",
    "from scipy import linalg\n",
    "from sklearn.metrics import pairwise_distances\n",
    "from scipy.spatial.distance import squareform\n",
    "from sklearn.manifold import TSNE\n",
    "from matplotlib import pyplot as plt\n",
    "import seaborn as sns\n",
    "sns.set(rc={'figure.figsize':(11.7,8.27)})\n",
    "palette = sns.color_palette(\"bright\", 10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with open('data/tsne/bert_embed.pickle', 'rb') as f:\n",
    "#     bert_data = pickle.load(f)\n",
    "with open('data/tsne/moco_embed.pickle', 'rb') as f:\n",
    "    moco_data = pickle.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def plot(data, exclude=[], n_iter=10000, perplexity=50, mean=True):\n",
    "    all_data = [] # negative_data[:]\n",
    "    labels = []\n",
    "    for key, val in data.items():\n",
    "        if key not in exclude:\n",
    "            labels.extend([key] * len(val))\n",
    "            all_data.extend(val)\n",
    "\n",
    "    tsne = TSNE(n_iter=n_iter, perplexity=perplexity)\n",
    "    if mean:\n",
    "        z = [x.mean(0).mean(0) for x in all_data]\n",
    "    else:\n",
    "        z = [x.flatten() for x in all_data]\n",
    "    tsne_results = tsne.fit_transform(z)\n",
    "\n",
    "    df = pd.DataFrame(tsne_results, columns=['x', 'y'])\n",
    "    print(len(df), len(labels))\n",
    "    df['Method Tag'] = labels\n",
    "\n",
    "    return p9.ggplot(p9.aes('x', 'y'), df) + p9.geom_point(p9.aes(color='Method Tag'), alpha=0.8) + p9.theme_classic()\n",
    "\n",
    "from tqdm.auto import tqdm\n",
    "n_iter = 10000\n",
    "for perplexity in [30, 60, 90, 120]:\n",
    "    p = plot(moco_data, mean=False, perplexity=perplexity, n_iter=n_iter)\n",
    "    out_file = f\"/work/paras/representjs/data/tsne/transformer_p{perplexity}_n{n_iter}.pdf\"\n",
    "    p.save(out_file)\n",
    "    p"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot(bert_data, ['compute', 'sort', 'compress', 'database'], perplexity=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot(moco_data, ['compute', 'sort', 'compress', 'database'], mean=False, perplexity=90)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
